// SKNotes - a note taking and gathering tool
// Copyright (C) 2009  Stefan Kueng
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.

namespace IfacesEnumsStructsClasses
{
    using System;
    using System.Text;
    using System.Runtime.InteropServices;

    [StructLayout(LayoutKind.Sequential)]
    public struct FULLPROPSPEC
    {
        public Guid guidPropSet;
        public PROPSPEC psProperty;
    }

    [StructLayout(LayoutKind.Sequential)]
    public struct FILTERREGION
    {
        public int idChunk;
        public int cwcStart;
        public int cwcExtent;
    }

    [StructLayout(LayoutKind.Explicit)]
    public struct PROPSPEC
    {
        [FieldOffset(0)]
        public int ulKind;     // 0 - string used; 1 - PROPID
        [FieldOffset(4)]
        public int propid;
        [FieldOffset(4)]
        public IntPtr lpwstr;
    }

    [Flags]
    public enum IFILTER_FLAGS
    {
        /// <summary>
        /// The caller should use the IPropertySetStorage and IPropertyStorage
        /// interfaces to locate additional properties. 
        /// When this flag is set, properties available through COM
        /// enumerators should not be returned from IFilter. 
        /// </summary>
        IFILTER_FLAGS_OLE_PROPERTIES = 1
    }

    /// <summary>
    /// Flags controlling the operation of the FileFilter
    /// instance.
    /// </summary>
    [Flags]
    public enum IFILTER_INIT
    {
        NONE = 0,
        /// <summary>
        /// Paragraph breaks should be marked with the Unicode PARAGRAPH
        /// SEPARATOR (0x2029)
        /// </summary>
        CANON_PARAGRAPHS = 1,

        /// <summary>
        /// Soft returns, such as the newline character in Microsoft Word, should
        /// be replaced by hard returnsLINE SEPARATOR (0x2028). Existing hard
        /// returns can be doubled. A carriage return (0x000D), line feed (0x000A),
        /// or the carriage return and line feed in combination should be considered
        /// a hard return. The intent is to enable pattern-expression matches that
        /// match against observed line breaks. 
        /// </summary>
        HARD_LINE_BREAKS = 2,

        /// <summary>
        /// Various word-processing programs have forms of hyphens that are not
        /// represented in the host character set, such as optional hyphens
        /// (appearing only at the end of a line) and nonbreaking hyphens. This flag
        /// indicates that optional hyphens are to be converted to nulls, and
        /// non-breaking hyphens are to be converted to normal hyphens (0x2010), or
        /// HYPHEN-MINUSES (0x002D). 
        /// </summary>
        CANON_HYPHENS = 4,

        /// <summary>
        /// Just as the CANON_HYPHENS flag standardizes hyphens,
        /// this one standardizes spaces. All special space characters, such as
        /// nonbreaking spaces, are converted to the standard space character
        /// (0x0020). 
        /// </summary>
        CANON_SPACES = 8,

        /// <summary>
        /// Indicates that the client wants text split into chunks representing
        /// public value-type properties. 
        /// </summary>
        APPLY_INDEX_ATTRIBUTES = 16,

        /// <summary>
        /// Indicates that the client wants text split into chunks representing
        /// properties determined during the indexing process. 
        /// </summary>
        APPLY_CRAWL_ATTRIBUTES = 256,

        /// <summary>
        /// Any properties not covered by the APPLY_INDEX_ATTRIBUTES
        /// and APPLY_CRAWL_ATTRIBUTES flags should be emitted. 
        /// </summary>
        APPLY_OTHER_ATTRIBUTES = 32,

        /// <summary>
        /// Optimizes IFilter for indexing because the client calls the
        /// IFilter::Init method only once and does not call IFilter::BindRegion.
        /// This eliminates the possibility of accessing a chunk both before and
        /// after accessing another chunk. 
        /// </summary>
        INDEXING_ONLY = 64,

        /// <summary>
        /// The text extraction process must recursively search all linked
        /// objects within the document. If a link is unavailable, the
        /// IFilter::GetChunk call that would have obtained the first chunk of the
        /// link should return FILTER_E_LINK_UNAVAILABLE. 
        /// </summary>
        SEARCH_LINKS = 128,

        /// <summary>
        /// The content indexing process can return property values set by the  filter. 
        /// </summary>
        FILTER_OWNED_VALUE_OK = 512
    }

    public struct STAT_CHUNK
    {
        /// <summary>
        /// The chunk identifier. Chunk identifiers must be unique for the
        /// current instance of the IFilter interface. 
        /// Chunk identifiers must be in ascending order. The order in which
        /// chunks are numbered should correspond to the order in which they appear
        /// in the source document. Some search engines can take advantage of the
        /// proximity of chunks of various properties. If so, the order in which
        /// chunks with different properties are emitted will be important to the
        /// search engine. 
        /// </summary>
        public int idChunk;

        /// <summary>
        /// The type of break that separates the previous chunk from the current
        ///  chunk. Values are from the CHUNK_BREAKTYPE enumeration. 
        /// </summary>
        [MarshalAs(UnmanagedType.U4)]
        public CHUNK_BREAKTYPE breakType;

        /// <summary>
        /// Flags indicate whether this chunk contains a text-type or a
        /// value-type property. 
        /// Flag values are taken from the CHUNKSTATE enumeration. If the CHUNK_TEXT flag is set, 
        /// IFilter::GetText should be used to retrieve the contents of the chunk
        /// as a series of words. 
        /// If the CHUNK_VALUE flag is set, IFilter::GetValue should be used to retrieve 
        /// the value and treat it as a single property value. If the filter dictates that the same 
        /// content be treated as both text and as a value, the chunk should be emitted twice in two       
        /// different chunks, each with one flag set. 
        /// </summary>
        [MarshalAs(UnmanagedType.U4)]
        public CHUNKSTATE flags;

        /// <summary>
        /// The language and sublanguage associated with a chunk of text. Chunk locale is used 
        /// by document indexers to perform proper word breaking of text. If the chunk is 
        /// neither text-type nor a value-type with data type VT_LPWSTR, VT_LPSTR or VT_BSTR, 
        /// this field is ignored. 
        /// </summary>
        public int locale;

        /// <summary>
        /// The property to be applied to the chunk. If a filter requires that       the same text 
        /// have more than one property, it needs to emit the text once for each       property 
        /// in separate chunks. 
        /// </summary>
        public FULLPROPSPEC attribute;

        /// <summary>
        /// The ID of the source of a chunk. The value of the idChunkSource     member depends on the nature of the chunk: 
        /// If the chunk is a text-type property, the value of the idChunkSource       member must be the same as the value of the idChunk member. 
        /// If the chunk is an public value-type property derived from textual       content, the value of the idChunkSource member is the chunk ID for the
        /// text-type chunk from which it is derived. 
        /// If the filter attributes specify to return only public value-type
        /// properties, there is no content chunk from which to derive the current
        /// public value-type property. In this case, the value of the
        /// idChunkSource member must be set to zero, which is an invalid chunk. 
        /// </summary>
        public int idChunkSource;

        /// <summary>
        /// The offset from which the source text for a derived chunk starts in
        /// the source chunk. 
        /// </summary>
        public int cwcStartSource;

        /// <summary>
        /// The length in characters of the source text from which the current
        /// chunk was derived. 
        /// A zero value signifies character-by-character correspondence between
        /// the source text and 
        /// the derived text. A nonzero value means that no such direct
        /// correspondence exists
        /// </summary>
        public int cwcLenSource;
    }

    /// <summary>
    /// Enumerates the different breaking types that occur between 
    /// chunks of text read out by the FileFilter.
    /// </summary>
    public enum CHUNK_BREAKTYPE
    {
        /// <summary>
        /// No break is placed between the current chunk and the previous chunk.
        /// The chunks are glued together. 
        /// </summary>
        CHUNK_NO_BREAK = 0,
        /// <summary>
        /// A word break is placed between this chunk and the previous chunk that
        /// had the same attribute. 
        /// Use of CHUNK_EOW should be minimized because the choice of word
        /// breaks is language-dependent, 
        /// so determining word breaks is best left to the search engine. 
        /// </summary>
        CHUNK_EOW = 1,
        /// <summary>
        /// A sentence break is placed between this chunk and the previous chunk
        /// that had the same attribute. 
        /// </summary>
        CHUNK_EOS = 2,
        /// <summary>
        /// A paragraph break is placed between this chunk and the previous chunk
        /// that had the same attribute.
        /// </summary>     
        CHUNK_EOP = 3,
        /// <summary>
        /// A chapter break is placed between this chunk and the previous chunk
        /// that had the same attribute. 
        /// </summary>
        CHUNK_EOC = 4
    }


    public enum CHUNKSTATE
    {
        /// <summary>
        /// The current chunk is a text-type property.
        /// </summary>
        CHUNK_TEXT = 0x1,
        /// <summary>
        /// The current chunk is a value-type property. 
        /// </summary>
        CHUNK_VALUE = 0x2,
        /// <summary>
        /// Reserved
        /// </summary>
        CHUNK_FILTER_OWNED_VALUE = 0x4
    }

    [CLSCompliantAttribute(false)]
    public enum IFilterReturnCode : uint
    {
        /// <summary>
        /// Success
        /// </summary>
        S_OK = 0,
        /// <summary>
        /// The function was denied access to the filter file. 
        /// </summary>
        E_ACCESSDENIED = 0x80070005,
        /// <summary>
        /// The function encountered an invalid handle,
        /// probably due to a low-memory situation. 
        /// </summary>
        E_HANDLE = 0x80070006,
        /// <summary>
        /// The function received an invalid parameter.
        /// </summary>
        E_INVALIDARG = 0x80070057,
        /// <summary>
        /// Out of memory
        /// </summary>
        E_OUTOFMEMORY = 0x8007000E,
        /// <summary>
        /// Not implemented
        /// </summary>
        E_NOTIMPL = 0x80004001,
        /// <summary>
        /// Unknown error
        /// </summary>
        E_FAIL = 0x80000008,
        /// <summary>
        /// File not filtered due to password protection
        /// </summary>
        FILTER_E_PASSWORD = 0x8004170B,
        /// <summary>
        /// The document format is not recognised by the filter
        /// </summary>
        FILTER_E_UNKNOWNFORMAT = 0x8004170C,
        /// <summary>
        /// No text in current chunk
        /// </summary>
        FILTER_E_NO_TEXT = 0x80041705,
        /// <summary>
        /// No more chunks of text available in object
        /// </summary>
        FILTER_E_END_OF_CHUNKS = 0x80041700,
        /// <summary>
        /// No more text available in chunk
        /// </summary>
        FILTER_E_NO_MORE_TEXT = 0x80041701,
        /// <summary>
        /// No more property values available in chunk
        /// </summary>
        FILTER_E_NO_MORE_VALUES = 0x80041702,
        /// <summary>
        /// Unable to access object
        /// </summary>
        FILTER_E_ACCESS = 0x80041703,
        /// <summary>
        /// Moniker doesn't cover entire region
        /// </summary>
        FILTER_W_MONIKER_CLIPPED = 0x00041704,
        /// <summary>
        /// Unable to bind IFilter for embedded object
        /// </summary>
        FILTER_E_EMBEDDING_UNAVAILABLE = 0x80041707,
        /// <summary>
        /// Unable to bind IFilter for linked object
        /// </summary>
        FILTER_E_LINK_UNAVAILABLE = 0x80041708,
        /// <summary>
        ///  This is the last text in the current chunk
        /// </summary>
        FILTER_S_LAST_TEXT = 0x00041709,
        /// <summary>
        /// This is the last value in the current chunk
        /// </summary>
        FILTER_S_LAST_VALUES = 0x0004170A
    }

    [ComImport, Guid("89BCB740-6119-101A-BCB7-00DD010655AF")]
    [InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
    [CLSCompliantAttribute(false)]
    public interface IFilter
    {
        /// <summary>
        /// The IFilter::Init method initializes a filtering session.
        /// </summary>
        [PreserveSig]
        IFilterReturnCode Init(
            //[in] Flag settings from the IFILTER_INIT enumeration for
            // controlling text standardization, property output, embedding
            // scope, and IFilter access patterns. 
          IFILTER_INIT grfFlags,

          // [in] The size of the attributes array. When nonzero, cAttributes
            //  takes 
            // precedence over attributes specified in grfFlags. If no
            // attribute flags 
            // are specified and cAttributes is zero, the default is given by
            // the 
            // PSGUID_STORAGE storage property set, which contains the date and
            //  time 
            // of the last write to the file, size, and so on; and by the
            //  PID_STG_CONTENTS 
            // 'contents' property, which maps to the main contents of the
            // file. 
            // For more information about properties and property sets, see
            // Property Sets. 
          int cAttributes,

          //[in] Array of pointers to FULLPROPSPEC structures for the
            // requested properties. 
            // When cAttributes is nonzero, only the properties in aAttributes
            // are returned. 
          IntPtr aAttributes,

          // [out] Information about additional properties available to the
            //  caller; from the IFILTER_FLAGS enumeration. 
          out IFILTER_FLAGS pdwFlags);

        /// <summary>
        /// The IFilter::GetChunk method positions the filter at the beginning
        /// of the next chunk, 
        /// or at the first chunk if this is the first call to the GetChunk
        /// method, and returns a description of the current chunk. 
        /// </summary>
        [PreserveSig]
        IFilterReturnCode GetChunk(out STAT_CHUNK pStat);

        /// <summary>
        /// The IFilter::GetText method retrieves text (text-type properties)
        /// from the current chunk, 
        /// which must have a CHUNKSTATE enumeration value of CHUNK_TEXT.
        /// </summary>
        [PreserveSig]
        IFilterReturnCode GetText(
            // [in/out] On entry, the size of awcBuffer array in wide/Unicode
            // characters. On exit, the number of Unicode characters written to
            // awcBuffer. 
            // Note that this value is not the number of bytes in the buffer. 
          ref uint pcwcBuffer,

          // Text retrieved from the current chunk. Do not terminate the
            // buffer with a character.  
          [Out(), MarshalAs(UnmanagedType.LPArray)] 
      char[] awcBuffer);

        /// <summary>
        /// The IFilter::GetValue method retrieves a value (public
        /// value-type property) from a chunk, 
        /// which must have a CHUNKSTATE enumeration value of CHUNK_VALUE.
        /// </summary>
        [PreserveSig]
        int GetValue(
            // Allocate the PROPVARIANT structure with CoTaskMemAlloc. Some
            // PROPVARIANT 
            // structures contain pointers, which can be freed by calling the
            // PropVariantClear function. 
            // It is up to the caller of the GetValue method to call the
            //   PropVariantClear method.            
            // ref IntPtr ppPropValue
            // [MarshalAs(UnmanagedType.Struct)]
          ref IntPtr PropVal);

        /// <summary>
        /// The IFilter::BindRegion method retrieves an interface representing
        /// the specified portion of the object. 
        /// Currently reserved for future use.
        /// </summary>
        [PreserveSig]
        int BindRegion(ref FILTERREGION origPos,
          ref Guid riid, ref object ppunk);
    }


}
